# -*- coding: utf-8 -*-
import urllib,socket,os,codecs

def getValue(Str,s1,start,len1,s2,len2):
    beg = Str.index(s1,start)+len1
    end = Str.index(s2,beg)+len2
    return Str[beg:end]

def getUrls(html):
    result = []
    html_str = open(html,'r').read()

    #检查IP受限
    if html_str.find('验证异常流量')>-1:
        print u"IP受限"
        return None
    
    #获取检索到的房源数
    house_num = int(getValue(html_str,"共找到<span>",0,16,"</span>",-1))
    #print house_num
    start = 0
    for i in range(house_num):
        url = getValue(html_str,"class=\"\" href=",start,15,"target",-2)
        #print i,url
        result.append(url)
        start = html_str.index("class=\"\" href=",start)+10
    return result

def getInfo(url):
    result = {}
    
    urllib.urlretrieve(url,"info.html")
    f_r = codecs.open("info.html",'r','utf-8')
    html_str = f_r.read()

    #链家编号
    result["ID"] = getValue(html_str,"链家编号",0,30,"<span class=\"jubao\">",0)
    #挂牌时间
    result["SellTime"] = getValue(html_str,"挂牌时间",0,11,"</li>",0)
    #小区名称
    result["name"] = getValue(html_str,"target=\"_blank\" class",0,29,"</a>",0)    
    #建成时间
    result["buildTime"] = getValue(html_str,"平米</div><div class=\"subInfo\">",0,29,"</div>",0)
    #总价
    result["Tprice"] = getValue(html_str,"span class=\"total\"",0,19,"</span>",0)
    #面积
    result["area"] = getValue(html_str,"建筑面积</span>",0,11,"㎡",0)
    #套内面积
    result["area1"] = getValue(html_str,"套内面积",0,11,"</li>",0)
    #无税单价
    result["price1"] = getValue(html_str,"\"unitPriceValue\"",0,17,"<i>",0)
    #房本年限
    result["fangben"] = getValue(html_str,"房本年限",0,11,"</li>",0)
    #户型
    result["huxing"] = getValue(html_str,"房屋户型",0,11,"</li>",0)
    #电梯
    result["dianti"] = getValue(html_str,"梯户比例",0,11,"</li>",0)
    #朝向
    result["chaoxiang"] = getValue(html_str,"房屋朝向",0,11,"</li>",0)
    #产权年限
    result["chanquan"] = getValue(html_str,"产权年限",0,11,"</li>",0)
    #装修情况
    result["zhuangxiu"] = getValue(html_str,"装修情况",0,11,"</li>",0)
    #所在楼层
    result["louceng"] = getValue(html_str,"所在楼层",0,11,"</li>",0)
    print result
    return result


def getInfoChged(url,price):
    urllib.urlretrieve(url,"info_chg.html")
    f_r = codecs.open("info_chg.html",'r','utf-8')
    html_str = f_r.read()
    #成交
    if html_str.find("链家成交")>=0:
        #成交价
        sold_price = getValue(html_str,"<div class=\"name\"><b>",0,21," - ",0)
        return [3,sold_price]
    
    #总价
    Tprice = getValue(html_str,"span class=\"total\"",0,19,"</span>",0)

    #下架
    if html_str.find("已下架")>=0:
        return [0,Tprice]
    #涨价
    if Tprice != price:
        return [1,Tprice]
    #不变
    if Tprice == price:
        return [2,0]


























